1 Set Working Directory

knitr::opts_knit$set(root.dir = '/Users/kpitz/Projects/Gulf_of_California/Data_for_upload/')
getwd()
## [1] "/Users/kpitz/Projects/Gulf_of_California/Data_for_upload"

2 Import packages

library(biomformat)
library(multcomp)
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
## 
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
## 
##     geyser
library(car)
## Loading required package: carData
library(ggpubr)
## Loading required package: ggplot2
## Loading required package: magrittr
library(ggplot2)
library(ggdendro)
library(phyloseq)
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.5-5
library(clustsig) #SIMPER/SIMPROF
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following object is masked from 'package:MASS':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:magrittr':
## 
##     extract
library(scales)
library(grid)
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(magrittr) # double pipes with dplyr
library(tibble)
library(cluster)

3 Read in Data, Rarefy, Export Rarefied Data

Data filtered and size limited (jupyter notebook) GOC_18S_Filter_Data.ipynb GOC_COI_Filter_Data.ipynb

3.1 COI

Exported Tables /Users/kpitz/Projects/Gulf_of_California/Decontaminated_tables/GOC_COI_seq_table_092519.csv /Users/kpitz/Projects/Gulf_of_California/Decontaminated_tables/GOC_COI_otu_table_092519.csv /Users/kpitz/Projects/Gulf_of_California/Decontaminated_tables/GOC_COI_taxa_table_092519.csv

#Data has been filtered in python notebook
GOC_COI <- merge_phyloseq(otu_table(as.matrix(read.csv(file = "Filtered_Data/GOC_COI_otu_table_092519.csv", row.names = 1,check.names = FALSE)), taxa_are_rows = TRUE),tax_table(as.matrix(read.csv(file = "Filtered_Data/GOC_COI_taxa_table_092519.csv", row.names = 1))),sample_data(read.csv(file = "Combined_PCTD_Metadata_043019.csv", row.names = 1)))

GOC_COI
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 21402 taxa and 20 samples ]
## sample_data() Sample Data:       [ 20 samples by 54 sample variables ]
## tax_table()   Taxonomy Table:    [ 21402 taxa by 7 taxonomic ranks ]

Rarefy

#rarefy
rarefy_lim <- 10000 #set read limit for rarefaction
seed_num <- 678  #set arbitrary seed number for repeatability 
GOC_COI_r = prune_samples(sample_sums(GOC_COI)>=rarefy_lim, GOC_COI)
GOC_COI_r = rarefy_even_depth(GOC_COI_r, sample.size = min(sample_sums(GOC_COI_r)), rngseed = seed_num, replace = FALSE, trimOTUs = TRUE, verbose = TRUE)
## `set.seed(678)` was used to initialize repeatable random subsampling.
## Please record this for your records so others can reproduce.
## Try `set.seed(678); .Random.seed` for the full vector
## ...
## 1228OTUs were removed because they are no longer 
## present in any sample after random subsampling
## ...
GOC_COI_r
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20174 taxa and 20 samples ]
## sample_data() Sample Data:       [ 20 samples by 54 sample variables ]
## tax_table()   Taxonomy Table:    [ 20174 taxa by 7 taxonomic ranks ]
print(min(sample_sums(GOC_COI_r)))
## [1] 129363

Export Rarefied tables for Plotting

#save OTU table, taxa table
#Export tables for python analysis
#create matrix of Data acceptable to vegan (transposed so samples in rows)
vegan_otu <- function(physeq) {
  OTU <- otu_table(physeq)
  if (taxa_are_rows(OTU)) {
    OTU <- t(OTU)
  }
  return(as(OTU, "matrix"))
}


#save rarefied; merged data
vegan_matrix_COI=vegan_otu(GOC_COI_r)
otu_tab_COI=otu_table(GOC_COI_r)
tax_tab_COI= tax_table(GOC_COI_r)
samp_dat_COI <- sample_data(GOC_COI_r)


#save to csv file
#write.csv(otu_tab_COI, '/Users/kpitz/Projects/MBON/Rarefied_Data_unmerged/GOC_COI_OTU_Table_092619_rare.csv')
#write.csv(tax_tab_COI, '/Users/kpitz/Projects/MBON/Rarefied_Data_unmerged/GOC_COI_taxa_table_092619_rare.csv')
#write.csv(samp_dat_COI, '/Users/kpitz/Projects/MBON/Rarefied_Data_unmerged/GOC_COI_samp_dat_092619_rare.csv')

3.2 18S

files = [‘/Users/kpitz/Projects/Gulf_of_California/Decontaminated_tables/GOC_18S_seq_table_100119.csv’, ‘/Users/kpitz/Projects/Gulf_of_California/Decontaminated_tables/GOC_18S_otu_table_100119.csv’, ‘/Users/kpitz/Projects/Gulf_of_California/Decontaminated_tables/GOC_18S_taxa_table_100119.csv’]

#Data has been size limited and common contaminants removed in python; annotations from Filtered file
GOC_18S <- merge_phyloseq(otu_table(as.matrix(read.csv(file = "Filtered_Data/GOC_18S_otu_table_100119.csv", row.names = 1,check.names = FALSE)), taxa_are_rows = TRUE),tax_table(as.matrix(read.csv(file = "Filtered_Data/GOC_18S_taxa_table_100119.csv", row.names = 1))),sample_data(read.csv(file = "Combined_PCTD_Metadata_043019.csv", row.names = 1)))

GOC_18S
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 44837 taxa and 20 samples ]
## sample_data() Sample Data:       [ 20 samples by 54 sample variables ]
## tax_table()   Taxonomy Table:    [ 44837 taxa by 7 taxonomic ranks ]

Rarefy

#rarefy
rarefy_lim <- 15000 #set read limit for rarefaction
seed_num <- 678  #set arbitrary seed number for repeatability 
GOC_18S_r = prune_samples(sample_sums(GOC_18S)>=rarefy_lim, GOC_18S)
GOC_18S_r = rarefy_even_depth(GOC_18S_r, sample.size = min(sample_sums(GOC_18S_r)), rngseed = seed_num, replace = FALSE, trimOTUs = TRUE, verbose = TRUE)
## `set.seed(678)` was used to initialize repeatable random subsampling.
## Please record this for your records so others can reproduce.
## Try `set.seed(678); .Random.seed` for the full vector
## ...
## 18359OTUs were removed because they are no longer 
## present in any sample after random subsampling
## ...
GOC_18S_r
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 26478 taxa and 20 samples ]
## sample_data() Sample Data:       [ 20 samples by 54 sample variables ]
## tax_table()   Taxonomy Table:    [ 26478 taxa by 7 taxonomic ranks ]
print(min(sample_sums(GOC_18S_r)))
## [1] 28676

Export Rarefied tables for Plotting

#save OTU table, taxa table
#Export tables for python analysis
#create matrix of Data acceptable to vegan (transposed so samples in rows)
vegan_otu <- function(physeq) {
  OTU <- otu_table(physeq)
  if (taxa_are_rows(OTU)) {
    OTU <- t(OTU)
  }
  return(as(OTU, "matrix"))
}


#save rarefied; merged data
vegan_matrix_18S=vegan_otu(GOC_18S_r)
otu_tab_18S=otu_table(GOC_18S_r)
tax_tab_18S= tax_table(GOC_18S_r)
samp_dat_18S <- sample_data(GOC_18S_r)


#save to csv file
#write.csv(otu_tab_18S, '/Users/kpitz/Projects/MBON/Rarefied_Data_unmerged/GOC_18S_OTU_Table_100119_rare.csv')
#write.csv(tax_tab_18S, '/Users/kpitz/Projects/MBON/Rarefied_Data_unmerged/GOC_18S_taxa_table_100119_rare.csv')
#write.csv(samp_dat_18S, '/Users/kpitz/Projects/MBON/Rarefied_Data_unmerged/GOC_18S_samp_dat_100119_rare.csv')

4 Begin Here to Recreate Figures using Rarefied Data

Import Rarefied data to edit figures

#Data previously Rarefied
GOC_COI_r <- merge_phyloseq(otu_table(as.matrix(read.csv(file = "Rarefied_Data/GOC_COI_OTU_Table_092619_rare.csv", row.names = 1,check.names = FALSE)), taxa_are_rows = TRUE),tax_table(as.matrix(read.csv(file = "Rarefied_Data/GOC_COI_taxa_table_092619_rare.csv", row.names = 1))),sample_data(read.csv(file = "Combined_PCTD_Metadata_043019.csv", row.names = 1)))

GOC_COI_r
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 20174 taxa and 20 samples ]
## sample_data() Sample Data:       [ 20 samples by 54 sample variables ]
## tax_table()   Taxonomy Table:    [ 20174 taxa by 7 taxonomic ranks ]
#Data previously Rarefied
GOC_18S_r <- merge_phyloseq(otu_table(as.matrix(read.csv(file = "Rarefied_Data/GOC_18S_OTU_Table_100119_rare.csv", row.names = 1,check.names = FALSE)), taxa_are_rows = TRUE),tax_table(as.matrix(read.csv(file = "Rarefied_Data/GOC_18S_taxa_table_100119_rare.csv", row.names = 1))),sample_data(read.csv(file = "Combined_PCTD_Metadata_043019.csv", row.names = 1)))

GOC_18S_r
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 26478 taxa and 20 samples ]
## sample_data() Sample Data:       [ 20 samples by 54 sample variables ]
## tax_table()   Taxonomy Table:    [ 26478 taxa by 7 taxonomic ranks ]
vegan_otu <- function(physeq) {
  OTU <- otu_table(physeq)
  if (taxa_are_rows(OTU)) {
    OTU <- t(OTU)
  }
  return(as(OTU, "matrix"))
}


#save rarefied; merged data
vegan_matrix_18S=vegan_otu(GOC_18S_r)
otu_tab_18S=otu_table(GOC_18S_r)
tax_tab_18S= tax_table(GOC_18S_r)
samp_dat_18S <- sample_data(GOC_18S_r)

vegan_matrix_COI=vegan_otu(GOC_COI_r)
otu_tab_COI=otu_table(GOC_COI_r)
tax_tab_COI= tax_table(GOC_COI_r)
samp_dat_COI <- sample_data(GOC_COI_r)

5 Transform sample metadata

#Transform Numeric Data in metadata
sample_data(GOC_COI_r) <- transform(sample_data(GOC_COI_r), TMP_C = as.numeric(TMP),
                                NO3_um = as.numeric(NO3),
                                CHL_GFF = as.numeric(CHL_GFF),
                                oxy_ml = as.numeric(OXY_ML),
                                sigma_theta = as.numeric(SIG_T),
                                DEPTH_M = as.numeric(DEPTH),
                                Fluor = as.numeric(FLUOR),
                                SAL = as.numeric(SAL)
)
#check modes of data
sapply(sample_data(GOC_COI_r), mode)
##              order       tag_sequence  primer_sequence_F 
##          "numeric"          "numeric"          "numeric" 
##  primer_sequence_R  library_tag_combo            library 
##          "numeric"          "numeric"          "numeric" 
##        sample_type              locus         tag_number 
##          "numeric"          "numeric"          "numeric" 
##                 R1                 R2          Treatment 
##          "numeric"          "numeric"          "numeric" 
##        Time_of_Day        Description      Description_3 
##          "numeric"          "numeric"          "numeric" 
##               site                SEQ             BOTTLE 
##          "numeric"          "numeric"          "numeric" 
##              DEPTH             CRUISE           PLATFORM 
##          "numeric"          "numeric"          "numeric" 
##            DEC_LAT           DEC_LONG                TMP 
##          "numeric"          "numeric"          "numeric" 
##                SAL            CHL_GFF           PRESSURE 
##          "numeric"          "numeric"          "numeric" 
##                NO3             OXY_ML               RDEP 
##          "numeric"          "numeric"          "numeric" 
##          TRANSMISS              SIG_T              FLUOR 
##          "numeric"          "numeric"          "numeric" 
##          DATE_TIME             cruise          SEQAvg_dg 
##          "numeric"          "numeric"          "numeric" 
##           AvgOfTMP         StDevOfTMP         CountOfTMP 
##          "numeric"          "numeric"          "numeric" 
##          AvgOfSAL1         StDevOfSAL         CountOfSAL 
##          "numeric"          "numeric"          "numeric" 
##          AvgOfCHLA        StDevOfCHLA        CountOfCHLA 
##          "numeric"          "numeric"          "numeric" 
##       AvgOfOXY_ML1     CountOfOXY_ML1      CountOfOXY_ML 
##          "numeric"          "numeric"          "numeric" 
##     AvgOfTRANSMISS   StDevOfTRANSMISS   CountOfTRANSMISS 
##          "numeric"          "numeric"          "numeric" 
##   AvgOfSIGMA_THETA StDevOfSIGMA_THETA CountOfSIGMA_THETA 
##          "numeric"          "numeric"          "numeric" 
##              TMP_C             NO3_um             oxy_ml 
##          "numeric"          "numeric"          "numeric" 
##        sigma_theta            DEPTH_M              Fluor 
##          "numeric"          "numeric"          "numeric"
samp_dat <- sample_data(GOC_COI_r)
#Transform Numeric Data in metadata
sample_data(GOC_18S_r) <- transform(sample_data(GOC_18S_r), TMP_C = as.numeric(TMP),
                                NO3_um = as.numeric(NO3),
                                CHL_GFF = as.numeric(CHL_GFF),
                                oxy_ml = as.numeric(OXY_ML),
                                sigma_theta = as.numeric(SIG_T),
                                DEPTH_M = as.numeric(DEPTH),
                                Fluor = as.numeric(FLUOR),
                                SAL = as.numeric(SAL)
)
#check modes of data
sapply(sample_data(GOC_18S_r), mode)
##              order       tag_sequence  primer_sequence_F 
##          "numeric"          "numeric"          "numeric" 
##  primer_sequence_R  library_tag_combo            library 
##          "numeric"          "numeric"          "numeric" 
##        sample_type              locus         tag_number 
##          "numeric"          "numeric"          "numeric" 
##                 R1                 R2          Treatment 
##          "numeric"          "numeric"          "numeric" 
##        Time_of_Day        Description      Description_3 
##          "numeric"          "numeric"          "numeric" 
##               site                SEQ             BOTTLE 
##          "numeric"          "numeric"          "numeric" 
##              DEPTH             CRUISE           PLATFORM 
##          "numeric"          "numeric"          "numeric" 
##            DEC_LAT           DEC_LONG                TMP 
##          "numeric"          "numeric"          "numeric" 
##                SAL            CHL_GFF           PRESSURE 
##          "numeric"          "numeric"          "numeric" 
##                NO3             OXY_ML               RDEP 
##          "numeric"          "numeric"          "numeric" 
##          TRANSMISS              SIG_T              FLUOR 
##          "numeric"          "numeric"          "numeric" 
##          DATE_TIME             cruise          SEQAvg_dg 
##          "numeric"          "numeric"          "numeric" 
##           AvgOfTMP         StDevOfTMP         CountOfTMP 
##          "numeric"          "numeric"          "numeric" 
##          AvgOfSAL1         StDevOfSAL         CountOfSAL 
##          "numeric"          "numeric"          "numeric" 
##          AvgOfCHLA        StDevOfCHLA        CountOfCHLA 
##          "numeric"          "numeric"          "numeric" 
##       AvgOfOXY_ML1     CountOfOXY_ML1      CountOfOXY_ML 
##          "numeric"          "numeric"          "numeric" 
##     AvgOfTRANSMISS   StDevOfTRANSMISS   CountOfTRANSMISS 
##          "numeric"          "numeric"          "numeric" 
##   AvgOfSIGMA_THETA StDevOfSIGMA_THETA CountOfSIGMA_THETA 
##          "numeric"          "numeric"          "numeric" 
##              TMP_C             NO3_um             oxy_ml 
##          "numeric"          "numeric"          "numeric" 
##        sigma_theta            DEPTH_M              Fluor 
##          "numeric"          "numeric"          "numeric"
samp_dat <- sample_data(GOC_18S_r)

6 Compare Diversity

Simpson, Shannon Diversity, Chao1 Richness

#Phyloseq
#18S
p_oBiom <- plot_richness(GOC_COI_r,x="Description", measures=c("Shannon","Simpson", "Chao1"), nrow=1, color='Time_of_Day') +
  theme(text = element_text(size=16), strip.text.x = element_text(size = 16), axis.text = element_text(size = rel(1), colour = "black")) +
  xlab("Sampling Region") + 
  ggtitle("COI Rarefied")+theme_bw()+scale_x_discrete(limits=c('PCNorth','EUNorth','EUSouth'))

p_oBiom + geom_boxplot(data=p_oBiom$data, aes(x=Description, y=value, color=NULL), alpha=0.1)
## Warning: Removed 40 rows containing missing values (geom_errorbar).

#COI
p_oBiom <- plot_richness(GOC_18S_r,x="Description", measures=c("Shannon","Simpson", "Chao1"), nrow=1,color='Time_of_Day') +
  theme(text = element_text(size=16), strip.text.x = element_text(size = 16), axis.text = element_text(size = rel(1), colour = "black")) +
  xlab("Sampling Region") + 
  ggtitle("18S Rarefied")+theme_bw()+scale_x_discrete(limits=c('PCNorth','EUNorth','EUSouth'))

p_oBiom + geom_boxplot(data=p_oBiom$data, aes(x=Description, y=value, color=NULL), alpha=0.1)
## Warning: Removed 40 rows containing missing values (geom_errorbar).

Create Dataframe of Shannon Diversity Data The following section draws from the tutorial: http://www.sthda.com/english/wiki/one-way-anova-test-in-r

6.1 COI

#Shannon
H <- diversity(vegan_matrix_COI)
H <- bind_rows(H)
H <- tbl_df(H)

H %<>%
   rownames_to_column %>%
   gather(var, value, -rowname) %>% 
   spread(rowname, value)

#Need to group by description
samp_df <- tbl_df(samp_dat_COI)
## Warning in class(x) <- c(subclass, tibble_class): Setting class(x) to
## multiple strings ("tbl_df", "tbl", ...); result will no longer be an S4
## object
samp_df$sample_names <- rownames(samp_dat_COI)
#join by index
joined_df <- inner_join(H,samp_df, by=c('var'='sample_names'))

joined_df$Description <- ordered(joined_df$Description,
                         levels = c("PCNorth", "EUNorth", "EUSouth"))
levels(joined_df$Description)
## [1] "PCNorth" "EUNorth" "EUSouth"
joined_df = rename(joined_df, 'Diversity'='1')

joined_df$Diversity <- as.numeric(joined_df$Diversity)


ggboxplot(joined_df, x = "Description", y = "Diversity", 
          color = "Description", palette = c("#00AFBB", "#E7B800", "#FC4E07"),
          order = c("PCNorth", "EUNorth", "EUSouth"),
          ylab = "Diversity", xlab = "Description")

ggline(joined_df, x = "Description", y = "Diversity", 
       add = c("mean_se", "jitter"), 
       order = c("PCNorth", "EUNorth", "EUSouth"),
       ylab = "Diversity", xlab = "Description")

ggline(joined_df, x = "Time_of_Day", y = "Diversity", 
       add = c("mean_se", "jitter"), 
       order = c("day", "night", "transition"),
       color='Description',
       ylab = "Diversity", xlab = "Time_of_Day")
## Warning in stats::qt(ci/2 + 0.5, data_sum$length - 1): NaNs produced
## Warning in stats::qt(ci/2 + 0.5, data_sum$length - 1): NaNs produced

## Warning in stats::qt(ci/2 + 0.5, data_sum$length - 1): NaNs produced
## Warning: Removed 3 rows containing missing values (geom_errorbar).

ANOVA

print('Region')
## [1] "Region"
# Compute the analysis of variance
res.aov <- aov(Diversity ~ Description, data = joined_df)
# Summary of the analysis
summary(res.aov)
##             Df Sum Sq Mean Sq F value  Pr(>F)   
## Description  2  7.509   3.755   8.906 0.00226 **
## Residuals   17  7.167   0.422                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(res.aov)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Diversity ~ Description, data = joined_df)
## 
## $Description
##                      diff        lwr      upr     p adj
## EUNorth-PCNorth 0.3089072 -0.7445535 1.362368 0.7363825
## EUSouth-PCNorth 1.3603450  0.4480213 2.272669 0.0036726
## EUSouth-EUNorth 1.0514378  0.1391140 1.963762 0.0228122
#Another method
#library(multcomp)
#perform multiple comparison procedures for an ANOVA. glht stands for general linear hypothesis tests
summary(glht(res.aov, linfct = mcp(Description = "Tukey")))
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: aov(formula = Diversity ~ Description, data = joined_df)
## 
## Linear Hypotheses:
##                        Estimate Std. Error t value Pr(>|t|)   
## EUNorth - PCNorth == 0   0.3089     0.4106   0.752  0.73516   
## EUSouth - PCNorth == 0   1.3603     0.3556   3.825  0.00356 **
## EUSouth - EUNorth == 0   1.0514     0.3556   2.957  0.02267 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
print('Time of Day')
## [1] "Time of Day"
# Compute the analysis of variance
res.aov <- aov(Diversity ~ Time_of_Day, data = joined_df)
# Summary of the analysis
summary(res.aov)
##             Df Sum Sq Mean Sq F value Pr(>F)
## Time_of_Day  2  1.532  0.7662   0.991  0.392
## Residuals   17 13.144  0.7732
#TukeyHSD(res.aov)

#Another method
#library(multcomp)
#perform multiple comparison procedures for an ANOVA. glht stands for general linear hypothesis tests
summary(glht(res.aov, linfct = mcp(Time_of_Day = "Tukey")))
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: aov(formula = Diversity ~ Time_of_Day, data = joined_df)
## 
## Linear Hypotheses:
##                         Estimate Std. Error t value Pr(>|t|)
## night - day == 0          0.1477     0.5202   0.284    0.956
## transition - day == 0    -0.4861     0.5676  -0.856    0.672
## transition - night == 0  -0.6338     0.4541  -1.396    0.363
## (Adjusted p values reported -- single-step method)

If ANOVA result is significant, compute Tukey HSD (Tukey Honest Significant Differences, R function: TukeyHSD()) for performing multiple pairwise-comparison between the means of groups.

The function TukeyHD() takes the fitted ANOVA as an argument. diff: difference between means of the two groups lwr, upr: the lower and the upper end point of the confidence interval at 95% (default) p adj: p-value after adjustment for the multiple comparisons.

Homogeneity of variances

res.aov <- aov(Diversity ~ Description, data = joined_df)
#Homogeneity of variances
plot(res.aov, 1)

leveneTest(Diversity ~ Description, data = joined_df)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  2  0.3052  0.741
##       17

From the output above we can see that the p-value is not less than the significance level of 0.05. This means that there is no evidence to suggest that the variance across groups is statistically significantly different. Therefore, we can assume the homogeneity of variances in the different treatment groups. http://www.sthda.com/english/wiki/one-way-anova-test-in-r

Homogeneity of variances

res.aov <- aov(Diversity ~ Time_of_Day, data = joined_df)
#Homogeneity of variances
plot(res.aov, 1)

leveneTest(Diversity ~ Time_of_Day, data = joined_df)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  2  0.5187 0.6044
##       17

Look at correlation between environmental variables and diversity

#look at Temperature vs Diversity

#average by site
avgdf = joined_df %>% group_by(site, TMP) %>% summarize(Diversity = mean(Diversity))
avgdf
## # A tibble: 15 x 3
## # Groups:   site [15]
##    site     TMP Diversity
##    <fct>  <dbl>     <dbl>
##  1 CP23    20.9      3.69
##  2 GOC2.0  24.1      5.03
##  3 UC1     12.6      2.46
##  4 UC10    17.2      4.84
##  5 UC12    18.9      3.56
##  6 UC13    19.7      4.93
##  7 UC14    20.3      4.82
##  8 UC15    20.6      4.51
##  9 UC2     12.7      3.10
## 10 UC3     12.7      3.18
## 11 UC4     14.7      3.17
## 12 UC5     15.3      4.33
## 13 UC6     15.7      2.79
## 14 UC7     15.5      3.65
## 15 UC9     16.3      2.70
res <- cor.test(avgdf$Diversity, avgdf$TMP, 
                    method = "pearson")
res
## 
##  Pearson's product-moment correlation
## 
## data:  avgdf$Diversity and avgdf$TMP
## t = 3.834, df = 13, p-value = 0.002069
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3449324 0.9035574
## sample estimates:
##       cor 
## 0.7284763
ggscatter(avgdf, x = "TMP", y = "Diversity", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "pearson",
          xlab = "Temperature", ylab = "Shannon Diversity", label="site", font.label=(8))

ggsave('Figures/Bray_Temp_Corr_Banzai_COI.png')
## Saving 7 x 5 in image

Compare Diversity means 18S

Create Dataframe of Shannon Diversity Data The following section draws from the tutorial: http://www.sthda.com/english/wiki/one-way-anova-test-in-r

6.2 18S

#Shannon
H <- diversity(vegan_matrix_18S)
H <- bind_rows(H)
H <- tbl_df(H)

H %<>%
   rownames_to_column %>%
   gather(var, value, -rowname) %>% 
   spread(rowname, value)

#Need to group by description
samp_df <- tbl_df(samp_dat_18S)
## Warning in class(x) <- c(subclass, tibble_class): Setting class(x) to
## multiple strings ("tbl_df", "tbl", ...); result will no longer be an S4
## object
samp_df$sample_names <- rownames(samp_dat_18S)
#join by index
joined_df <- inner_join(H,samp_df, by=c('var'='sample_names'))

joined_df$Description <- ordered(joined_df$Description,
                         levels = c("PCNorth", "EUNorth", "EUSouth"))
levels(joined_df$Description)
## [1] "PCNorth" "EUNorth" "EUSouth"
joined_df = rename(joined_df, 'Diversity'='1')

joined_df$Diversity <- as.numeric(joined_df$Diversity)


ggboxplot(joined_df, x = "Description", y = "Diversity", 
          color = "Description", palette = c("#00AFBB", "#E7B800", "#FC4E07"),
          order = c("PCNorth", "EUNorth", "EUSouth"),
          ylab = "Diversity", xlab = "Description")

ggline(joined_df, x = "Description", y = "Diversity", 
       add = c("mean_se", "jitter"), 
       order = c("PCNorth", "EUNorth", "EUSouth"),
       ylab = "Diversity", xlab = "Description")

ggline(joined_df, x = "Time_of_Day", y = "Diversity", 
       add = c("mean_se", "jitter"), 
       order = c("day", "night", "transition"),
       color='Description',
       ylab = "Diversity", xlab = "Time_of_Day")
## Warning in stats::qt(ci/2 + 0.5, data_sum$length - 1): NaNs produced
## Warning in stats::qt(ci/2 + 0.5, data_sum$length - 1): NaNs produced

## Warning in stats::qt(ci/2 + 0.5, data_sum$length - 1): NaNs produced
## Warning: Removed 3 rows containing missing values (geom_errorbar).

ANOVA

print('Region')
## [1] "Region"
# Compute the analysis of variance
res.aov <- aov(Diversity ~ Description, data = joined_df)
# Summary of the analysis
summary(res.aov)
##             Df Sum Sq Mean Sq F value  Pr(>F)   
## Description  2  3.494  1.7472   9.699 0.00155 **
## Residuals   17  3.062  0.1801                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(res.aov)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Diversity ~ Description, data = joined_df)
## 
## $Description
##                      diff        lwr       upr     p adj
## EUNorth-PCNorth 0.1809419 -0.5076938 0.8695776 0.7814324
## EUSouth-PCNorth 0.9166015  0.3202254 1.5129775 0.0028578
## EUSouth-EUNorth 0.7356596  0.1392835 1.3320356 0.0148404
#Another method
#library(multcomp)
#perform multiple comparison procedures for an ANOVA. glht stands for general linear hypothesis tests
summary(glht(res.aov, linfct = mcp(Description = "Tukey")))
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: aov(formula = Diversity ~ Description, data = joined_df)
## 
## Linear Hypotheses:
##                        Estimate Std. Error t value Pr(>|t|)   
## EUNorth - PCNorth == 0   0.1809     0.2684   0.674  0.78036   
## EUSouth - PCNorth == 0   0.9166     0.2325   3.943  0.00275 **
## EUSouth - EUNorth == 0   0.7357     0.2325   3.164  0.01482 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
print('Time of Day')
## [1] "Time of Day"
# Compute the analysis of variance
res.aov <- aov(Diversity ~ Time_of_Day, data = joined_df)
# Summary of the analysis
summary(res.aov)
##             Df Sum Sq Mean Sq F value Pr(>F)  
## Time_of_Day  2  1.924  0.9619   3.529 0.0522 .
## Residuals   17  4.633  0.2725                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#TukeyHSD(res.aov)

#Another method
#library(multcomp)
#perform multiple comparison procedures for an ANOVA. glht stands for general linear hypothesis tests
summary(glht(res.aov, linfct = mcp(Time_of_Day = "Tukey")))
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: aov(formula = Diversity ~ Time_of_Day, data = joined_df)
## 
## Linear Hypotheses:
##                         Estimate Std. Error t value Pr(>|t|)  
## night - day == 0          0.4684     0.3088   1.517   0.3061  
## transition - day == 0    -0.2211     0.3370  -0.656   0.7901  
## transition - night == 0  -0.6895     0.2696  -2.558   0.0502 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

If ANOVA result is significant, compute Tukey HSD (Tukey Honest Significant Differences, R function: TukeyHSD()) for performing multiple pairwise-comparison between the means of groups.

The function TukeyHD() takes the fitted ANOVA as an argument. diff: difference between means of the two groups lwr, upr: the lower and the upper end point of the confidence interval at 95% (default) p adj: p-value after adjustment for the multiple comparisons.

Homogeneity of variances

res.aov <- aov(Diversity ~ Description, data = joined_df)
#Homogeneity of variances
plot(res.aov, 1)

leveneTest(Diversity ~ Description, data = joined_df)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  2   0.229 0.7977
##       17

From the output above we can see that the p-value is not less than the significance level of 0.05. This means that there is no evidence to suggest that the variance across groups is statistically significantly different. Therefore, we can assume the homogeneity of variances in the different treatment groups. http://www.sthda.com/english/wiki/one-way-anova-test-in-r

Homogeneity of variances

res.aov <- aov(Diversity ~ Time_of_Day, data = joined_df)
#Homogeneity of variances
plot(res.aov, 1)

leveneTest(Diversity ~ Time_of_Day, data = joined_df)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  2  0.8562 0.4423
##       17

Look at correlation between environmental variables and diversity

#look at Temperature vs Diversity

#average by site
avgdf = joined_df %>% group_by(site, TMP) %>% summarize(Diversity = mean(Diversity))
avgdf
## # A tibble: 15 x 3
## # Groups:   site [15]
##    site     TMP Diversity
##    <fct>  <dbl>     <dbl>
##  1 CP23    20.9      3.69
##  2 GOC2.0  24.1      2.55
##  3 UC1     12.6      2.07
##  4 UC10    17.2      2.84
##  5 UC12    18.9      3.14
##  6 UC13    19.7      3.18
##  7 UC14    20.3      3.25
##  8 UC15    20.6      2.95
##  9 UC2     12.7      1.84
## 10 UC3     12.7      2.43
## 11 UC4     14.7      2.18
## 12 UC5     15.3      2.96
## 13 UC6     15.7      1.98
## 14 UC7     15.5      2.81
## 15 UC9     16.3      2.15
res <- cor.test(avgdf$Diversity, avgdf$TMP, 
                    method = "pearson")
res
## 
##  Pearson's product-moment correlation
## 
## data:  avgdf$Diversity and avgdf$TMP
## t = 3.0606, df = 13, p-value = 0.009113
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2017633 0.8707458
## sample estimates:
##       cor 
## 0.6471404
ggscatter(avgdf, x = "TMP", y = "Diversity", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "pearson",
          xlab = "Temperature", ylab = "Shannon Diversity", label="site", font.label=(8))

ggsave('Figures/Bray_Temp_Corr_Banzai_18S.png')
## Saving 7 x 5 in image

7 Dendrogram Plots

SIMPROF Analysis 18S

#create hierarchical cluster diagram - example
dist.mat <- vegdist(vegan_matrix_18S, method="bray", binary=FALSE)
clust.res<-hclust(dist.mat, method="average")
plot(clust.res)

#SIMPROF: gives clusters that pass a significance threshold
########################################
simprof_mat_18S = simprof(vegan_matrix_18S, num.expected=100, num.simulated=99, 
                      method.cluster="complete", method.distance = "braycurtis", 
                      alpha = 0.05, sample.orientation = "row")
## Warning: This version of the Bray-Curtis index does not use
## standardization.
## Warning: To use the standardized version, use "actual-braycurtis".
## Warning: See the help documentation for more information.
#visualize with colored hierarchical cluster diagram by sample name
plot_sim <- simprof.plot(simprof_mat_18S)

simprof_mat_18S
## $numgroups
## [1] 11
## 
## $pval
##       [,1] [,2]       [,3]
##  [1,]   -3  -19 1.00000000
##  [2,]  -16  -20 1.00000000
##  [3,]  -14  -15 1.00000000
##  [4,]  -17    1 0.00000000
##  [5,]   -1   -2         NA
##  [6,]   -6   -8         NA
##  [7,]   -7    4 0.00000000
##  [8,]    5    6 0.06060606
##  [9,]  -10  -13 1.00000000
## [10,]   -4    7 0.00000000
## [11,]  -18    2 0.00000000
## [12,]  -11  -12         NA
## [13,]    8   10 0.00000000
## [14,]   -9   12 0.78787879
## [15,]   -5   13 0.02020202
## [16,]    9   11 0.00000000
## [17,]   14   16 0.00000000
## [18,]    3   17 0.00000000
## [19,]   15   18 0.00000000
## 
## $hclust
## 
## Call:
## hclust(d = rawdata.dist, method = method.cluster)
## 
## Cluster method   : complete 
## Number of objects: 20 
## 
## 
## $significantclusters
## $significantclusters[[1]]
## [1] "UC5"
## 
## $significantclusters[[2]]
## [1] "UC1" "UC2" "UC6" "UC9"
## 
## $significantclusters[[3]]
## [1] "UC4"
## 
## $significantclusters[[4]]
## [1] "UC7"
## 
## $significantclusters[[5]]
## [1] "UC3_2"
## 
## $significantclusters[[6]]
## [1] "UC3"   "UC3_1"
## 
## $significantclusters[[7]]
## [1] "GOC2a" "GOC2b"
## 
## $significantclusters[[8]]
## [1] "UC10" "UC13" "UC14"
## 
## $significantclusters[[9]]
## [1] "UC12" "UC15"
## 
## $significantclusters[[10]]
## [1] "CP23_2"
## 
## $significantclusters[[11]]
## [1] "CP23"   "CP23_1"
#simprof has 4 objects (list of lists)
#how to call a significant cluster:
#simprof_mat[[4]][[1]]

#Map these results to an nmds plot
simprofCLUSTERS_18S = data.frame()

#only considers clusters of more than two samples
for(j in 1:length(simprof_mat_18S$significantclusters)){
  if(length(simprof_mat_18S$significantclusters[[j]]) > 1){
    simprofCLUSTERS_18S <- rbind(simprofCLUSTERS_18S, cbind(j, simprof_mat_18S$significantclusters[[j]]))
  }
}

#create df with samples as index and clusterID as "simprofCLUSTER"
rownames(simprofCLUSTERS_18S) <- simprofCLUSTERS_18S[,2]
colnames(simprofCLUSTERS_18S) <- c("simprofCLUSTER", "group")

simprofCLUSTERS_18S
##        simprofCLUSTER  group
## UC1                 2    UC1
## UC2                 2    UC2
## UC6                 2    UC6
## UC9                 2    UC9
## UC3                 6    UC3
## UC3_1               6  UC3_1
## GOC2a               7  GOC2a
## GOC2b               7  GOC2b
## UC10                8   UC10
## UC13                8   UC13
## UC14                8   UC14
## UC12                9   UC12
## UC15                9   UC15
## CP23               11   CP23
## CP23_1             11 CP23_1
#Plot with ggplot2

library(ggplot2)
library(ggdendro)

dendr <- dendro_data(simprof_mat_18S[[3]], type="rectangle") 

#Define cluster based on similarity percent - good for plotting
clust <- cutree(simprof_mat_18S[[3]], h = 40)               # find 'cut' clusters (k) or choose level (h) numeric scalar or vector with heights where the tree should be cut.
clust2 <- cutree(simprof_mat_18S[[3]], h = 60)
clust3 <- cutree(simprof_mat_18S[[3]], h = 20)  
clust4 <- cutree(simprof_mat_18S[[3]], h = 80)  

clust.df <- data.frame(label = names(clust), cluster_40 = clust, cluster_60 = clust2, cluster_20 = clust3, cluster_80 = clust4)
sapply(clust.df, mode)
##      label cluster_40 cluster_60 cluster_20 cluster_80 
##  "numeric"  "numeric"  "numeric"  "numeric"  "numeric"
clust.df$label <- as.character(clust.df$label)
clust.df <- clust.df[order(clust.df$label),]

#join sample data with cluster df and simprofCLUSTERS df:
tree_scores <- merge(clust.df, samp_dat,by=0)
rownames(tree_scores) <- tree_scores$Row.names
#remove duplicated column after assigning index
tree_scores$Row.names <- NULL
#merge with simprof cluster df
tree_scores <- merge(tree_scores, simprofCLUSTERS_18S,by=0, all=TRUE )


#dendr$labels is df of x,y and attributes -> add to it with metadata (tree_scores)
tree_data_18S <- merge(dendr$labels, tree_scores,by="label")
tree_data_18S$Site <- tree_data_18S$Row.names

svglite::svglite("Figures/Dendro_18S_Banzai.svg", height=4.5, width=4.5)

cbPalette <- c("#56B4E9", "#E69F00", "#009E73")
ggplot() + 
  geom_segment(data=segment(dendr), aes(x=x, y=y, xend=xend, yend=yend)) +
  geom_point(data=tree_data_18S, aes(x=x, y=y, color=Description, shape=Time_of_Day),alpha=1, size=5)+ 
  geom_text(data=tree_data_18S, aes(x=x, y=y, label=Site, hjust=-.4), size=3) +
  #geom_text(data=tree_data_18S, aes(x=x, y=y, label=simprofCLUSTER, hjust=3), size=5) +
  geom_hline(yintercept=20, color="blue", linetype = "longdash", alpha=0.8) +
  geom_hline(yintercept=40, color="green", linetype = "longdash", alpha=0.8) +
  geom_hline(yintercept=60, color="orange", linetype = "longdash", alpha=0.8) +
  coord_flip() + scale_y_reverse(expand=c(2, 0)) +
  scale_colour_manual(values=cbPalette) +
  theme(axis.line.y=element_blank(),
        axis.ticks.y=element_blank(),
        axis.text.y=element_blank(),
        axis.title.y=element_blank(),
        panel.background=element_rect(fill="white"),
        panel.grid=element_blank())

dev.off()
## quartz_off_screen 
##                 2
cbPalette <- c("#56B4E9", "#E69F00", "#009E73")
ggplot() + 
  geom_segment(data=segment(dendr), aes(x=x, y=y, xend=xend, yend=yend)) +
  geom_point(data=tree_data_18S, aes(x=x, y=y, color=Description, shape=Time_of_Day),alpha=1, size=5)+ 
  geom_text(data=tree_data_18S, aes(x=x, y=y, label=Site, hjust=-.4), size=3) +
  #geom_text(data=tree_data_18S, aes(x=x, y=y, label=simprofCLUSTER, hjust=3), size=5) +
  geom_hline(yintercept=20, color="blue", linetype = "longdash", alpha=0.8) +
  geom_hline(yintercept=40, color="green", linetype = "longdash", alpha=0.8) +
  geom_hline(yintercept=60, color="orange", linetype = "longdash", alpha=0.8) +
  coord_flip() + scale_y_reverse(expand=c(2, 0)) +
  scale_colour_manual(values=cbPalette) +
  theme(axis.line.y=element_blank(),
        axis.ticks.y=element_blank(),
        axis.text.y=element_blank(),
        axis.title.y=element_blank(),
        panel.background=element_rect(fill="white"),
        panel.grid=element_blank())

SIMPROF Analysis COI

#create hierarchical cluster diagram - example
dist.mat <- vegdist(vegan_matrix_COI, method="bray", binary=FALSE)
clust.res<-hclust(dist.mat, method="average")
plot(clust.res)

#SIMPROF: gives clusters that pass a significance threshold
########################################
simprof_mat_COI = simprof(vegan_matrix_COI, num.expected=100, num.simulated=99, 
                      method.cluster="complete", method.distance = "braycurtis", 
                      alpha = 0.05, sample.orientation = "row")
## Warning: This version of the Bray-Curtis index does not use
## standardization.
## Warning: To use the standardized version, use "actual-braycurtis".
## Warning: See the help documentation for more information.
#visualize with colored hierarchical cluster diagram by sample name
plot_sim <- simprof.plot(simprof_mat_COI)

simprof_mat_COI
## $numgroups
## [1] 9
## 
## $pval
##       [,1] [,2]      [,3]
##  [1,]   -3  -19 1.0000000
##  [2,]  -16  -20 1.0000000
##  [3,]   -1  -17        NA
##  [4,]   -6   -8        NA
##  [5,]   -2   -7 1.0000000
##  [6,]    1    5 0.0000000
##  [7,]    3    4 0.7474747
##  [8,]   -4    6 0.0000000
##  [9,]   -9  -11        NA
## [10,]    7    8 0.0000000
## [11,]  -14  -15 1.0000000
## [12,]  -12    9        NA
## [13,]  -18    2 0.0000000
## [14,]  -10  -13        NA
## [15,]   12   14 0.3030303
## [16,]   -5   10 0.0000000
## [17,]   11   15 0.0000000
## [18,]   13   17 0.0000000
## [19,]   16   18 0.0000000
## 
## $hclust
## 
## Call:
## hclust(d = rawdata.dist, method = method.cluster)
## 
## Cluster method   : complete 
## Number of objects: 20 
## 
## 
## $significantclusters
## $significantclusters[[1]]
## [1] "UC5"
## 
## $significantclusters[[2]]
## [1] "UC1"   "UC3_2" "UC6"   "UC9"  
## 
## $significantclusters[[3]]
## [1] "UC4"
## 
## $significantclusters[[4]]
## [1] "UC3"   "UC3_1"
## 
## $significantclusters[[5]]
## [1] "UC2" "UC7"
## 
## $significantclusters[[6]]
## [1] "CP23_2"
## 
## $significantclusters[[7]]
## [1] "CP23"   "CP23_1"
## 
## $significantclusters[[8]]
## [1] "GOC2a" "GOC2b"
## 
## $significantclusters[[9]]
## [1] "UC14" "UC10" "UC13" "UC12" "UC15"
#simprof has 4 objects (list of lists)
#how to call a significant cluster:
#simprof_mat[[4]][[1]]

#Map these results to an nmds plot
simprofCLUSTERS_COI = data.frame()

#only considers clusters of more than two samples
for(j in 1:length(simprof_mat_COI$significantclusters)){
  if(length(simprof_mat_COI$significantclusters[[j]]) > 1){
    simprofCLUSTERS_COI <- rbind(simprofCLUSTERS_COI, cbind(j, simprof_mat_COI$significantclusters[[j]]))
  }
}

#create df with samples as index and clusterID as "simprofCLUSTER"
rownames(simprofCLUSTERS_COI) <- simprofCLUSTERS_COI[,2]
colnames(simprofCLUSTERS_COI) <- c("simprofCLUSTER", "group")

simprofCLUSTERS_COI
##        simprofCLUSTER  group
## UC1                 2    UC1
## UC3_2               2  UC3_2
## UC6                 2    UC6
## UC9                 2    UC9
## UC3                 4    UC3
## UC3_1               4  UC3_1
## UC2                 5    UC2
## UC7                 5    UC7
## CP23                7   CP23
## CP23_1              7 CP23_1
## GOC2a               8  GOC2a
## GOC2b               8  GOC2b
## UC14                9   UC14
## UC10                9   UC10
## UC13                9   UC13
## UC12                9   UC12
## UC15                9   UC15

COI

#Plot with ggplot2
library(ggplot2)
library(ggdendro)

dendr <- dendro_data(simprof_mat_COI[[3]], type="rectangle", compress=TRUE) 

#Define cluster based on similarity percent - good for plotting
clust <- cutree(simprof_mat_COI[[3]], h = 40)               # find 'cut' clusters (k) or choose level (h) numeric scalar or vector with heights where the tree should be cut.
clust2 <- cutree(simprof_mat_COI[[3]], h = 60)
clust3 <- cutree(simprof_mat_COI[[3]], h = 20)  
clust4 <- cutree(simprof_mat_COI[[3]], h = 80)  

clust.df <- data.frame(label = names(clust), cluster_40 = clust, cluster_60 = clust2, cluster_20 = clust3, cluster_80 = clust4)
sapply(clust.df, mode)
##      label cluster_40 cluster_60 cluster_20 cluster_80 
##  "numeric"  "numeric"  "numeric"  "numeric"  "numeric"
clust.df$label <- as.character(clust.df$label)
clust.df <- clust.df[order(clust.df$label),]

#join sample data with cluster df and simprofCLUSTERS df:
tree_scores <- merge(clust.df, samp_dat,by=0)
rownames(tree_scores) <- tree_scores$Row.names
#remove duplicated column after assigning index
tree_scores$Row.names <- NULL
#merge with simprof cluster df
tree_scores <- merge(tree_scores, simprofCLUSTERS_COI,by=0, all=TRUE )


#dendr$labels is df of x,y and attributes -> add to it with metadata (tree_scores)
tree_data_COI <- merge(dendr$labels, tree_scores,by="label")
tree_data_COI$Site <- tree_data_COI$Row.names

svglite::svglite("Figures/Dendro_COI_Banzai.svg", height=4.5, width=4.5)

cbPalette <- c("#56B4E9", "#E69F00", "#009E73")
ggplot() + 
  geom_segment(data=segment(dendr), aes(x=x, y=y, xend=xend, yend=yend)) +
  geom_point(data=tree_data_COI, aes(x=x, y=y, color=Description, shape=Time_of_Day),alpha=1, size=5)+ 
  geom_text(data=tree_data_COI, aes(x=x, y=y, label=Site, hjust=-.4), size=3) +
  #geom_text(data=tree_data_COI, aes(x=x, y=y, label=simprofCLUSTER, hjust=4), size=5) +
  geom_hline(yintercept=20, color="blue", linetype = "longdash", alpha=0.8) +
  geom_hline(yintercept=40, color="green", linetype = "longdash", alpha=0.8) +
  geom_hline(yintercept=60, color="orange", linetype = "longdash", alpha=0.8) +
  #coord_flip() + scale_y_reverse(expand=c(0.2, 0)) +
  coord_flip() + scale_y_reverse(expand=c(2, 0)) +
  scale_colour_manual(values=cbPalette) +
  theme(axis.line.y=element_blank(),
        axis.ticks.y=element_blank(),
        axis.text.y=element_blank(),
        axis.title.y=element_blank(),
        panel.background=element_rect(fill="white"),
        panel.grid=element_blank())

dev.off()
## quartz_off_screen 
##                 2
cbPalette <- c("#56B4E9", "#E69F00", "#009E73")
ggplot() + 
  geom_segment(data=segment(dendr), aes(x=x, y=y, xend=xend, yend=yend)) +
  geom_point(data=tree_data_COI, aes(x=x, y=y, color=Description, shape=Time_of_Day),alpha=1, size=5)+ 
  geom_text(data=tree_data_COI, aes(x=x, y=y, label=Site, hjust=-.4), size=3) +
  #geom_text(data=tree_data_COI, aes(x=x, y=y, label=simprofCLUSTER, hjust=4), size=5) +
  geom_hline(yintercept=20, color="blue", linetype = "longdash", alpha=0.8) +
  geom_hline(yintercept=40, color="green", linetype = "longdash", alpha=0.8) +
  geom_hline(yintercept=60, color="orange", linetype = "longdash", alpha=0.8) +
  #coord_flip() + scale_y_reverse(expand=c(0.2, 0)) +
  coord_flip() + scale_y_reverse(expand=c(2, 0)) +
  scale_colour_manual(values=cbPalette) +
  theme(axis.line.y=element_blank(),
        axis.ticks.y=element_blank(),
        axis.text.y=element_blank(),
        axis.title.y=element_blank(),
        panel.background=element_rect(fill="white"),
        panel.grid=element_blank())

8 NMDS Analysis

Nonmetric Multidimensional Scaling

8.1 COI

#make NMDS plot
set.seed(12) #set seed if want reproducible plots, otherwise random
NMDS_analysis=metaMDS(vegan_matrix_COI, # Our community-by-species matrix
                      k=2, trymax = 50) # The number of reduced dimensions
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.08573922 
## Run 1 stress 0.06770388 
## ... New best solution
## ... Procrustes: rmse 0.07626857  max resid 0.3042716 
## Run 2 stress 0.09269221 
## Run 3 stress 0.06781486 
## ... Procrustes: rmse 0.01031494  max resid 0.03471772 
## Run 4 stress 0.09234719 
## Run 5 stress 0.08355816 
## Run 6 stress 0.08550555 
## Run 7 stress 0.07060257 
## Run 8 stress 0.09264433 
## Run 9 stress 0.06781491 
## ... Procrustes: rmse 0.01031213  max resid 0.03468699 
## Run 10 stress 0.06770411 
## ... Procrustes: rmse 9.588694e-05  max resid 0.0002067728 
## ... Similar to previous best
## Run 11 stress 0.09720211 
## Run 12 stress 0.06770389 
## ... Procrustes: rmse 0.0002954924  max resid 0.000846243 
## ... Similar to previous best
## Run 13 stress 0.07069544 
## Run 14 stress 0.0835996 
## Run 15 stress 0.07060265 
## Run 16 stress 0.09089074 
## Run 17 stress 0.07069541 
## Run 18 stress 0.06781491 
## ... Procrustes: rmse 0.01032165  max resid 0.03476408 
## Run 19 stress 0.07057182 
## Run 20 stress 0.08574041 
## *** Solution reached
## Warning in postMDS(out$points, dis, plot = max(0, plot - 1), ...): skipping
## half-change scaling: too few points below threshold
stressplot(NMDS_analysis)

species.scores <- as.data.frame(scores(NMDS_analysis, "species"))  #Using the scores function from vegan to extract the species scores and convert to a data.frame
species.scores$species <- rownames(species.scores)  # create a column of species, represents OTU IDs
nmds_sp_scores <- merge(species.scores, tax_tab_COI,by="row.names", all=TRUE)
data.scores <- as.data.frame(scores(NMDS_analysis))  #Using the scores function from vegan to extract the site scores and convert to a data.frame
rownames(tree_data_COI) <- tree_data_COI$label
nmds_scores <- merge(data.scores, tree_data_COI,by="row.names", all=TRUE)

Rename NMDS data to save for later.

nmds_COI <- nmds_scores

Plot

ggplot() + 
  geom_point(data=nmds_COI,aes(x=NMDS1,y=NMDS2, colour=Description, shape=Treatment),size=7,alpha=0.9) + # add the point markers
  geom_text(data=nmds_COI,aes(x=NMDS1,y=NMDS2,label=Treatment),size=3,vjust=0.4) +  # add the site labels
  scale_colour_brewer(palette = "Dark2")+
  coord_equal() +
  theme_bw()

ggplot() + 
  stat_ellipse(data=nmds_COI,aes(x=NMDS1,y=NMDS2, colour=Description),type = "t", linetype=2) +
  stat_ellipse(data=nmds_COI,aes(x=NMDS1,y=NMDS2, fill=Description),geom= "polygon", alpha=0.2) +
  geom_point(data=nmds_COI,aes(x=NMDS1,y=NMDS2, colour=Description, shape=Treatment),size=7,alpha=0.9) + # add the point markers
  geom_text(data=nmds_COI,aes(x=NMDS1,y=NMDS2,label=Treatment),size=3,vjust=0.4) +  # add the site labels
  scale_colour_brewer(palette = "Dark2")+
  scale_fill_brewer(palette = "Dark2")+
  coord_equal() +
  theme_bw()

8.2 18S

#make NMDS plot
set.seed(12) #set seed if want reproducible plots, otherwise random
NMDS_analysis=metaMDS(vegan_matrix_18S, # Our community-by-species matrix
                      k=2, trymax = 50) # The number of reduced dimensions
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1051481 
## Run 1 stress 0.105086 
## ... New best solution
## ... Procrustes: rmse 0.004438413  max resid 0.01566353 
## Run 2 stress 0.1136906 
## Run 3 stress 0.112558 
## Run 4 stress 0.1096126 
## Run 5 stress 0.1069758 
## Run 6 stress 0.1183514 
## Run 7 stress 0.1091666 
## Run 8 stress 0.1209838 
## Run 9 stress 0.1112415 
## Run 10 stress 0.1208403 
## Run 11 stress 0.1091898 
## Run 12 stress 0.112558 
## Run 13 stress 0.1051482 
## ... Procrustes: rmse 0.004509224  max resid 0.01589073 
## Run 14 stress 0.11228 
## Run 15 stress 0.1091667 
## Run 16 stress 0.107567 
## Run 17 stress 0.1164657 
## Run 18 stress 0.116579 
## Run 19 stress 0.1166531 
## Run 20 stress 0.1081893 
## Run 21 stress 0.1144916 
## Run 22 stress 0.109323 
## Run 23 stress 0.1143064 
## Run 24 stress 0.1133838 
## Run 25 stress 0.1154821 
## Run 26 stress 0.1087038 
## Run 27 stress 0.1141413 
## Run 28 stress 0.112558 
## Run 29 stress 0.1076255 
## Run 30 stress 0.115341 
## Run 31 stress 0.1051481 
## ... Procrustes: rmse 0.004394558  max resid 0.01551965 
## Run 32 stress 0.105086 
## ... New best solution
## ... Procrustes: rmse 7.173461e-06  max resid 2.415254e-05 
## ... Similar to previous best
## *** Solution reached
## Warning in postMDS(out$points, dis, plot = max(0, plot - 1), ...): skipping
## half-change scaling: too few points below threshold
NMDS_analysis
## 
## Call:
## metaMDS(comm = vegan_matrix_18S, k = 2, trymax = 50) 
## 
## global Multidimensional Scaling using monoMDS
## 
## Data:     wisconsin(sqrt(vegan_matrix_18S)) 
## Distance: bray 
## 
## Dimensions: 2 
## Stress:     0.105086 
## Stress type 1, weak ties
## Two convergent solutions found after 32 tries
## Scaling: centring, PC rotation 
## Species: expanded scores based on 'wisconsin(sqrt(vegan_matrix_18S))'
stressplot(NMDS_analysis)

species.scores <- as.data.frame(scores(NMDS_analysis, "species"))  #Using the scores function from vegan to extract the species scores and convert to a data.frame
species.scores$species <- rownames(species.scores)  # create a column of species, represents OTU IDs
nmds_sp_scores <- merge(species.scores, tax_tab_18S,by="row.names", all=TRUE)
data.scores <- as.data.frame(scores(NMDS_analysis))  #Using the scores function from vegan to extract the site scores and convert to a data.frame
rownames(tree_data_18S) <- tree_data_18S$label
nmds_scores <- merge(data.scores, tree_data_18S,by="row.names", all=TRUE)
#head(nmds_sp_scores)

save nmds data

nmds_18S <- nmds_scores

Plot

ggplot() + 
  geom_point(data=nmds_18S,aes(x=NMDS1,y=NMDS2, colour=Description, shape=Treatment),size=7,alpha=0.9) + # add the point markers
  geom_text(data=nmds_18S,aes(x=NMDS1,y=NMDS2,label=Treatment),size=3,vjust=0.4) +  # add the site labels
  scale_colour_brewer(palette = "Dark2")+
  coord_equal() +
  theme_bw()

ggplot() + 
  stat_ellipse(data=nmds_18S,aes(x=NMDS1,y=NMDS2, colour=Description),type = "t", linetype=2) +
  stat_ellipse(data=nmds_18S,aes(x=NMDS1,y=NMDS2, fill=Description),geom= "polygon", alpha=0.2) +
  geom_point(data=nmds_18S,aes(x=NMDS1,y=NMDS2, colour=Description, shape=Treatment),size=7,alpha=0.9) + # add the point markers
  geom_text(data=nmds_18S,aes(x=NMDS1,y=NMDS2,label=Treatment),size=3,vjust=0.4) +  # add the site labels
  scale_colour_brewer(palette = "Dark2")+
  scale_fill_brewer(palette = "Dark2")+
  coord_equal() +
  theme_bw()

9 Procrustes Analysis

Plot 18S and COI

ggplot() + 
  geom_point(data=nmds_18S,aes(x=NMDS1,y=NMDS2, colour=Description, shape=Treatment),size=7,alpha=0.9) + # add the point markers
  geom_text(data=nmds_18S,aes(x=NMDS1,y=NMDS2,label=site),size=3,vjust=0.4) +  # add the site labels
  scale_colour_brewer(palette = "Dark2")+
  coord_equal() +
  theme_bw()+ labs(title = "18S")

ggplot() + 
  geom_point(data=nmds_COI,aes(x=NMDS1,y=NMDS2, colour=Description, shape=Treatment),size=7,alpha=0.9) + # add the point markers
  geom_text(data=nmds_COI,aes(x=NMDS1,y=NMDS2,label=site),size=3,vjust=0.4) +  # add the site labels
  scale_colour_brewer(palette = "Dark2")+
  coord_equal() +
  theme_bw()+ labs(title = "COI")

#Procrustes Analysis

X <- subset(nmds_18S, select=c("NMDS1", "NMDS2"))
row.names(X) <- nmds_18S$Row.names
Y <- subset(nmds_COI, select=c("NMDS1", "NMDS2"))
row.names(Y) <- nmds_COI$Row.names
test <- procrustes(X , Y, scale = TRUE, symmetric = FALSE, scores = "sites")

test1 <- plot(test, kind=1)

plot(test, kind=2)

summary(test)
## 
## Call:
## procrustes(X = X, Y = Y, scale = TRUE, symmetric = FALSE, scores = "sites") 
## 
## Number of objects: 20    Number of dimensions: 2 
## 
## Procrustes sum of squares:  
##  0.5384057 
## Procrustes root mean squared error: 
##  0.164074 
## Quantiles of Procrustes errors:
##        Min         1Q     Median         3Q        Max 
## 0.05063788 0.08433838 0.11554570 0.18095496 0.37421122 
## 
## Rotation matrix:
##             [,1]       [,2]
## [1,]  0.99975108 0.02231081
## [2,] -0.02231081 0.99975108
## 
## Translation of averages:
##              [,1]          [,2]
## [1,] 3.186301e-18 -3.878791e-18
## 
## Scaling of target:
## [1] 0.9334968
residuals(test)
##       CP23     CP23_1     CP23_2      GOC2a      GOC2b        UC1 
## 0.05188261 0.09006798 0.08920954 0.14390611 0.17572225 0.20573732 
##       UC10       UC12       UC13       UC14       UC15        UC2 
## 0.06833289 0.21032866 0.19665309 0.06212232 0.11179537 0.10184002 
##        UC3      UC3_1      UC3_2        UC4        UC5        UC6 
## 0.11975239 0.11929604 0.06972490 0.32070985 0.11147780 0.37421122 
##        UC7        UC9 
## 0.05063788 0.14179347

Plot with colors

heads.scores <- as.data.frame(scores(test1$heads))
heads.scores$label <- rownames(heads.scores)
heads.scores$Description <- nmds_COI[,c("Description")]

points.scores <- as.data.frame(scores(test1$points))
points.scores$label <- rownames(points.scores)
points.scores$Description <- nmds_COI[,c("Description")]

cbPalette <- c("#56B4E9", "#E69F00", "#009E73")
ggplot() + 
  geom_point(data=heads.scores,aes(x=NMDS1,y=NMDS2, colour=Description),shape=19,size=6,alpha=0.9) +
  geom_point(data=points.scores,aes(x=Dim1,y=Dim2, colour=Description),shape=17,size=6,alpha=0.9) +  # add the site labels
  geom_text(data=heads.scores,aes(x=NMDS1,y=NMDS2,label=label),size=3,vjust=2) +  # add the site labels
  geom_segment(aes(xend = heads.scores$NMDS1,yend = heads.scores$NMDS2,x = points.scores$Dim1,y = points.scores$Dim2),colour='black',alpha=0.6, size=0.2) +
  scale_colour_manual(values=cbPalette) +
  coord_equal() +
  theme_bw() + labs(title = "GOC clusters: ")

Plot with arrows

heads.scores <- as.data.frame(scores(test1$heads))
heads.scores$label <- rownames(heads.scores)
heads.scores$Description <- nmds_COI[,c("Description")]

points.scores <- as.data.frame(scores(test1$points))
points.scores$label <- rownames(points.scores)
points.scores$Description <- nmds_COI[,c("Description")]

# Plot with colors
svglite::svglite("Figures/Procrustes_COI_18S_Banzai.svg", height=6, width=6)
cbPalette <- c("#56B4E9", "#E69F00", "#009E73")
ggplot() + 
  geom_point(data=heads.scores,aes(x=NMDS1,y=NMDS2, colour=Description),shape=19,size=4,alpha=0.9) +
  geom_point(data=points.scores,aes(x=Dim1,y=Dim2, colour=Description),shape=17,size=4,alpha=0.9) +  # add the site labels
  geom_text(data=heads.scores,aes(x=NMDS1,y=NMDS2,label=label),size=3,vjust=2) +  # add the site labels
  geom_segment(aes(xend = heads.scores$NMDS1,yend = heads.scores$NMDS2,x = points.scores$Dim1,y = points.scores$Dim2),colour='black',alpha=0.6, size=0.2) +
  scale_colour_manual(values=cbPalette) +
  coord_equal() +
  theme_bw() + labs(title = "GOC clusters: ")
dev.off()
## quartz_off_screen 
##                 2
# Plot with Arrows
ggplot() + 
  geom_point(data=heads.scores,aes(x=NMDS1,y=NMDS2),size=3,alpha=0.9) +
  geom_point(data=points.scores,aes(x=Dim1,y=Dim2),colour='red',size=3) +  # add the site labels
  geom_text(data=heads.scores,aes(x=NMDS1,y=NMDS2,label=label),size=3,vjust=1.8) +  # add the site labels
  geom_segment(aes(xend = heads.scores$NMDS1,yend = heads.scores$NMDS2,x = points.scores$Dim1,y = points.scores$Dim2),colour='black',arrow=arrow(length=unit(0.30,"cm")), size=0.5) +
  coord_equal() +
  theme_bw() + labs(title = "GOC clusters: ")

10 PERMANOVA

10.1 18S

# ADONIS test

# Calculate bray curtis distance matrix
B_bray <- phyloseq::distance(GOC_18S_r, method = "bray")

# make a data frame from the sample_data
sampledf <- data.frame(sample_data(GOC_18S_r))

# ADONIS calc
adonis_18S = adonis(B_bray ~ Description + Time_of_Day, data = sampledf)
adonis_18S
## 
## Call:
## adonis(formula = B_bray ~ Description + Time_of_Day, data = sampledf) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##             Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## Description  2    2.5374 1.26869  8.9887 0.50225  0.001 ***
## Time_of_Day  2    0.3975 0.19876  1.4082 0.07868  0.197    
## Residuals   15    2.1171 0.14114         0.41907           
## Total       19    5.0520                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Homogeneity of dispersion test
model<-betadisper(B_bray,sampledf$Time_of_Day)
permutest(model, pairwise = TRUE)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)  
## Groups     2 0.17500 0.087502 4.2071    999   0.02 *
## Residuals 17 0.35357 0.020798                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
##                 day    night transition
## day                 0.117000      0.300
## night      0.131894               0.017
## transition 0.286830 0.018209
plot(model)

boxplot(model)

model<-betadisper(B_bray,sampledf$Description)
permutest(model, pairwise = TRUE)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)    
## Groups     2 0.23383 0.116916 19.468    999  0.001 ***
## Residuals 17 0.10209 0.006005                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
##            EUNorth    EUSouth PCNorth
## EUNorth            2.0000e-03   0.058
## EUSouth 5.5636e-03              0.001
## PCNorth 6.3044e-02 2.0061e-05
plot(model)

boxplot(model)

10.2 COI

# ADONIS test

# Calculate bray curtis distance matrix
B_bray <- phyloseq::distance(GOC_COI_r, method = "bray")

# make a data frame from the sample_data
sampledf <- data.frame(sample_data(GOC_COI_r))

# ADONIS calc
adonis_COI = adonis(B_bray ~ Description + Time_of_Day, data = sampledf)
adonis_COI
## 
## Call:
## adonis(formula = B_bray ~ Description + Time_of_Day, data = sampledf) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##             Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## Description  2    2.5379 1.26897  5.1277 0.37345  0.001 ***
## Time_of_Day  2    0.5458 0.27291  1.1028 0.08032  0.323    
## Residuals   15    3.7121 0.24747         0.54623           
## Total       19    6.7959                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Homogeneity of dispersion test
model<-betadisper(B_bray,sampledf$Time_of_Day)
permutest(model, pairwise = TRUE)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)   
## Groups     2 0.19524 0.097618 5.7199    999  0.004 **
## Residuals 17 0.29013 0.017066                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
##                  day     night transition
## day                  0.0620000      0.247
## night      0.0664924                0.008
## transition 0.2422758 0.0057941
plot(model)

boxplot(model)

model<-betadisper(B_bray,sampledf$Description)
permutest(model, pairwise = TRUE)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df  Sum Sq Mean Sq      F N.Perm Pr(>F)    
## Groups     2 0.27341 0.13670 18.301    999  0.001 ***
## Residuals 17 0.12699 0.00747                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
##            EUNorth    EUSouth PCNorth
## EUNorth            4.0000e-03   0.185
## EUSouth 1.2687e-03              0.001
## PCNorth 1.9229e-01 2.8486e-05
plot(model)

boxplot(model)

11 Package citations

citation("ggplot2")
## 
## To cite ggplot2 in publications, please use:
## 
##   H. Wickham. ggplot2: Elegant Graphics for Data Analysis.
##   Springer-Verlag New York, 2016.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Book{,
##     author = {Hadley Wickham},
##     title = {ggplot2: Elegant Graphics for Data Analysis},
##     publisher = {Springer-Verlag New York},
##     year = {2016},
##     isbn = {978-3-319-24277-4},
##     url = {https://ggplot2.tidyverse.org},
##   }
citation("clustsig")
## 
## To cite package 'clustsig' in publications use:
## 
##   Douglas Whitaker and Mary Christman (2014). clustsig:
##   Significant Cluster Analysis. R package version 1.1.
##   https://CRAN.R-project.org/package=clustsig
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {clustsig: Significant Cluster Analysis},
##     author = {Douglas Whitaker and Mary Christman},
##     year = {2014},
##     note = {R package version 1.1},
##     url = {https://CRAN.R-project.org/package=clustsig},
##   }
## 
## ATTENTION: This citation information has been auto-generated from
## the package DESCRIPTION file and may need manual editing, see
## 'help("citation")'.
citation("phyloseq")
## 
## To cite phyloseq in publications, or otherwise credit, please use:
## 
##   phyloseq: An R package for reproducible interactive analysis and
##   graphics of microbiome census data. Paul J. McMurdie and Susan
##   Holmes (2013) PLoS ONE 8(4):e61217.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     author = {Paul J. McMurdie and Susan Holmes},
##     journal = {PLoS ONE},
##     pages = {e61217},
##     title = {phyloseq: An R package for reproducible interactive analysis and graphics of microbiome census data},
##     volume = {8},
##     number = {4},
##     year = {2013},
##     url = {http://dx.plos.org/10.1371/journal.pone.0061217},
##   }
citation("vegan")
## 
## To cite package 'vegan' in publications use:
## 
##   Jari Oksanen, F. Guillaume Blanchet, Michael Friendly, Roeland
##   Kindt, Pierre Legendre, Dan McGlinn, Peter R. Minchin, R. B.
##   O'Hara, Gavin L. Simpson, Peter Solymos, M. Henry H. Stevens,
##   Eduard Szoecs and Helene Wagner (2019). vegan: Community Ecology
##   Package. R package version 2.5-5.
##   https://CRAN.R-project.org/package=vegan
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {vegan: Community Ecology Package},
##     author = {Jari Oksanen and F. Guillaume Blanchet and Michael Friendly and Roeland Kindt and Pierre Legendre and Dan McGlinn and Peter R. Minchin and R. B. O'Hara and Gavin L. Simpson and Peter Solymos and M. Henry H. Stevens and Eduard Szoecs and Helene Wagner},
##     year = {2019},
##     note = {R package version 2.5-5},
##     url = {https://CRAN.R-project.org/package=vegan},
##   }
## 
## ATTENTION: This citation information has been auto-generated from
## the package DESCRIPTION file and may need manual editing, see
## 'help("citation")'.
citation("dplyr")
## 
## To cite package 'dplyr' in publications use:
## 
##   Hadley Wickham, Romain François, Lionel Henry and Kirill Müller
##   (2019). dplyr: A Grammar of Data Manipulation. R package version
##   0.8.3. https://CRAN.R-project.org/package=dplyr
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {dplyr: A Grammar of Data Manipulation},
##     author = {Hadley Wickham and Romain François and Lionel Henry and Kirill Müller},
##     year = {2019},
##     note = {R package version 0.8.3},
##     url = {https://CRAN.R-project.org/package=dplyr},
##   }
citation("biomformat")
## 
## To cite package 'biomformat' in publications use:
## 
##   Paul J. McMurdie and Joseph N Paulson (2019). biomformat: An
##   interface package for the BIOM file format.
##   https://github.com/joey711/biomformat/, http://biom-format.org/.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {biomformat: An interface package for the BIOM file format},
##     author = {Paul J. McMurdie and Joseph N Paulson},
##     year = {2019},
##     note = {https://github.com/joey711/biomformat/, http://biom-format.org/},
##   }
## 
## ATTENTION: This citation information has been auto-generated from
## the package DESCRIPTION file and may need manual editing, see
## 'help("citation")'.
citation("ggdendro")
## 
## To cite package 'ggdendro' in publications use:
## 
##   Andrie de Vries and Brian D. Ripley (2016). ggdendro: Create
##   Dendrograms and Tree Diagrams Using 'ggplot2'. R package version
##   0.1-20. https://CRAN.R-project.org/package=ggdendro
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {ggdendro: Create Dendrograms and Tree Diagrams Using 'ggplot2'},
##     author = {Andrie {de Vries} and Brian D. Ripley},
##     year = {2016},
##     note = {R package version 0.1-20},
##     url = {https://CRAN.R-project.org/package=ggdendro},
##   }
citation("ggpubr")
## 
## To cite package 'ggpubr' in publications use:
## 
##   Alboukadel Kassambara (2019). ggpubr: 'ggplot2' Based
##   Publication Ready Plots. R package version 0.2.1.
##   https://CRAN.R-project.org/package=ggpubr
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {ggpubr: 'ggplot2' Based Publication Ready Plots},
##     author = {Alboukadel Kassambara},
##     year = {2019},
##     note = {R package version 0.2.1},
##     url = {https://CRAN.R-project.org/package=ggpubr},
##   }
citation("car")
## 
## To cite the car package in publications use:
## 
##   John Fox and Sanford Weisberg (2019). An {R} Companion to
##   Applied Regression, Third Edition. Thousand Oaks CA: Sage. URL:
##   https://socialsciences.mcmaster.ca/jfox/Books/Companion/
## 
## A BibTeX entry for LaTeX users is
## 
##   @Book{,
##     title = {An {R} Companion to Applied Regression},
##     edition = {Third},
##     author = {John Fox and Sanford Weisberg},
##     year = {2019},
##     publisher = {Sage},
##     address = {Thousand Oaks {CA}},
##     url = {https://socialsciences.mcmaster.ca/jfox/Books/Companion/},
##   }
citation("multcomp")
## 
## Please cite the multcomp package by the following reference:
## 
##   Torsten Hothorn, Frank Bretz and Peter Westfall (2008).
##   Simultaneous Inference in General Parametric Models. Biometrical
##   Journal 50(3), 346--363.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     title = {Simultaneous Inference in General Parametric Models},
##     author = {Torsten Hothorn and Frank Bretz and Peter Westfall},
##     journal = {Biometrical Journal},
##     year = {2008},
##     volume = {50},
##     number = {3},
##     pages = {346--363},
##   }